** Merge Data
* Should only be called from master file as directories are specified there

* Version history
* 4.9.19 Vojta: after field work completed


clear

** Set Filenames *************************************************************

global participants participants_cn
global participants_activities participants_activities_cn
global participants_whatsapp participants_whatsapp_cn
global survey survey

** Prepare Data ****************************************************************

// Participants
{
import delimited "$rawdatapath/HK/$participants.csv", clear delimiter(";")
*rename ÿþid participant_id
*rename ïid participant_id
rename id participant_id
*rename id participant_id
drop ind_id iatdatabasestructure //Not needed
* Convert time variables into time format

foreach var of varlist time_login-time_enter_round_2_page_8 round1_complete_time round2_complete_time {
gen double `var'_t=clock(`var', "20YMD hms") 
format `var'_t %tC
drop `var'
rename `var'_t `var'
}

save "$cleandatapath/HK/$participants.dta", replace
}

// Participants_activities
{
import delimited "$rawdatapath/HK/$participants_activities.csv", clear delimiter(";")
*rename ÿþid id
*rename ïid id
drop you_a you_b other_a other_b id

* Generate common indicator for table and round
tostring table_id round_id, gen(t1 r1)
gen table_round_id = t1+"_"+r1
drop t1 r1 table_id round_id

reshape wide sort choice , i(participant_id) j(table_round_id) string 

* Label variables accoring to game outcomes
foreach var of varlist sort1_1 sort1_2 choice1_1 choice1_2 {
lab var `var' "you_a 10 you_b 11 other_a 10 other_b 13"
}
foreach var of varlist sort2_1 sort2_2 choice2_1 choice2_2 {
lab var `var' "you_a 10 you_b 10 other_a 10 other_b 8"
}
foreach var of varlist sort3_1 sort3_2 choice3_1 choice3_2 {
lab var `var' "you_a 10 you_b 9 other_a 10 other_b 6"
}
foreach var of varlist sort4_1 sort4_2 choice4_1 choice4_2 {
lab var `var' "you_a 10 you_b 8 other_a 10 other_b 6"
}
foreach var of varlist sort5_1 sort5_2 choice5_1 choice5_2 {
lab var `var' "you_a 10 you_b 7 other_a 10 other_b 6"
}
foreach var of varlist sort6_1 sort6_2 choice6_1 choice6_2 {
lab var `var' "you_a 10 you_b 6 other_a 10 other_b 0"
}

save "$cleandatapath/HK/$participants_activities.dta", replace
}

// Participants_whatsapp_ar
{
import delimited "$rawdatapath/HK/$participants_whatsapp.csv", clear delimiter(";")
*drop ÿþid 
*drop ïid
drop id

* Convert time variables into time format

gen date_time_t=clock(date_time, "DMYhm") 
format date_time_t %tC
drop date_time
rename date_time_t date_time


* Generate binary variable for message
generate message_yesno = .
replace message_yesno=0 if message==""
replace message_yesno=1 if message!=""

* Generate variable that measures length of message
gen message_length = .
replace message_length = strlen(message) if message_yesno==1

* Drop duplicates [[[THIS NEEDS TO BE CHECKED; DROPS INFORMATION]]]
sort participant_id message_yesno
quietly by participant_id message_yesno: gen dup = cond(_N==1,0,_n)
drop if dup>1
drop dup

* Reshaped using message_yesno as id
reshape wide message date_time download_pic message_length, i(participant_id) j(message_yesno)

save "$cleandatapath/HK/$participants_whatsapp.dta", replace
}

// Survey
{
*import delimited "$rawdatapath/190415/$survey.csv", clear delimiter(tab) encoding ("utf-8")
import delimited "$rawdatapath/HK/$survey.csv", clear delimiter(",")
drop v6 v7 v8 v9
* Extract participant_id and survey_id from username
split username, parse("-")
*drop username3 username4 username5 username runid sectionindex
drop username runid sectionindex
rename username1 participant_id

* destring participant_id
gen byte notnumeric = real(participant_id)==.
drop if notnumeric==1
drop notnumeric
destring participant_id, replace

* Drop data if username is not translatable
/*
gen lep=length(participant_id)
gen les=length(username2)
drop if lep>4 | les>1 | les==0
drop lep les
*/

drop username2 username3 username4

* quantify questions (e.g. 2_1 = question 2 in survey 1)

gen question_num ="na"
replace question_num ="1_1" if question=="dw_contractlength"
replace question_num ="2_1" if question=="dw_minwage"
replace question_num ="3_1" if question=="rights_employee"
replace question_num ="4_1" if question=="rights_employer"
replace question_num ="5_1" if question=="dw_typicalcountries"
replace question_num ="6_1" if question=="dw_everemploy"
replace question_num ="7_1" if question=="dw_future"
replace question_num ="8_1" if question=="dw_philippines"
replace question_num ="1_2" if question=="match_real"
replace question_num ="2_2" if question=="match_country"
replace question_num ="3_2" if question=="match_remember"
replace question_num ="11_3" if question=="dwimagine_payontime"
replace question_num ="12_3" if question=="dwimagine_mobileaway"
replace question_num ="13_3" if question=="dwimagine_reducesalary"
replace question_num ="14_3" if question=="dwimagine_slap"
replace question_num ="15_3" if question=="dwimagine_trust"
replace question_num ="16_3" if question=="dwimagine_respect"
replace question_num ="17_3" if question=="dwimagine_leavehouse"
replace question_num ="18_3" if question=="dwimagine_nobreak"
replace question_num ="2_3" if question=="filipino_view"
replace question_num ="3_3" if question=="philippines_visit"
replace question_num ="4_3" if question=="belief_sent"


drop question
rename question_num question

drop if question=="na"

duplicates drop participant_id question, force // participant_id 73689 has multiple entries. clarify!

/*
**************************************
**v6,7,8 and 9 not unique by id (as many just missing). Duplicating the values in each id, if missing.
forvalues j=6/9{
forvalues i=1/15{
bysort participant_id : replace v`j'=v`j'[_n-1] if missing(v`j')
bysort participant_id : replace v`j'=v`j'[_n+1] if missing(v`j')
}
}
****Still some not uniqe. For now, those will just be changed. Unclear how to proceed in the end.
forvalues j=6/9{
forvalues i=1/15{
bysort participant_id : replace v`j'=v`j'[_n-1] 
}
}
**************************************
*/

reshape wide response, i(participant_id) j(question) string
destring participant_id, replace

* quantify responses

* Question 1_1
gen variable=.
replace variable = 1 if response1_1 =="6month"
replace variable = 2 if response1_1 =="1year"
replace variable = 3 if response1_1 =="2years"
replace variable = 4 if response1_1 =="5years"
replace variable = 5 if response1_1 =="indefinite"
drop response1_1
rename variable response1_1
lab define res11 1 "6 months" 2 "1 year" 3 "2 years" 4 "5 years" 5 "indefinite"
lab values response1_1 res11
lab var response1_1 "How long does a typical foreign domestic worker contract last?"

* Question 2_1
gen variable=.
replace variable = 1 if response2_1 =="100"
replace variable = 2 if response2_1 =="400"
replace variable = 3 if response2_1 =="700"
replace variable = 4 if response2_1 =="1000"
drop response2_1
rename variable response2_1
lab define res21 1 "100 US Dollars" 2 "400 US Dollars" 3 "700 US Dollars" 4 "1000 US Dollars" 
lab values response2_1 res21
lab var response2_1 "How much is a minimal monthly salary of a foreign domestic worker"

* Question 3_1
lab define resyesno 0 "No" 1 "Yes"

foreach name in "med_care" "contact_fam" "rest" "terminate_contr" {
	gen response3_1_`name'=0
	lab val response3_1_`name' resyesno
}
lab var response3_1_med_care "Does DW have right to basic medical care in host country?"
lab var response3_1_contact_fam "Does DW have right to contact their family?"
lab var response3_1_rest "Does DW have right to rest two days per week?"
lab var response3_1_terminate_contr "Does DW have right to terminate contract without giving areason (1 month in advance)?"

gen testresp = subinstr(response3_1,":"," ",10)
gen wordnum=wordcount(testresp)
qui sum wordnum
local N = r(max)

split response3_1, parse(":")
forvalues i=1/`N' {
	replace response3_1_med_care=1 if response3_1`i'=="medical_care"
	replace response3_1_contact_fam=1 if response3_1`i'=="contact_family" 
	replace response3_1_rest=1 if response3_1`i'=="rest" 
	replace response3_1_terminate_contr=1 if response3_1`i'=="terminate_contract" 
}


foreach name in "med_care" "contact_fam" "rest" "terminate_contr" {
	replace response3_1_`name'=. if wordnum==0
}
drop testresp wordnum response3_1

forvalues i=1/`N' {
	drop response3_1`i'
}

* Question 4_1

foreach name in "terminate_noreason" "reduce_payment" "terminate_absent" {
	gen response4_1_`name'=0
	lab val response4_1_`name' resyesno
}

lab var response4_1_terminate_noreason "Does the employer have the right to terminate the contract without any reason (1 month in advance)?"
lab var response4_1_reduce_payment "Does the employer have the right to delay or reduce payment to the domestic worker in case of misconduct?"
lab var response4_1_terminate_absent "Does the employer have the right to terminate the contract immediately in case of frequent absenteeism?"

gen testresp = subinstr(response4_1,":"," ",10)
gen wordnum=wordcount(testresp)
qui sum wordnum
local N = r(max)

split response4_1, parse(":")
forvalues i=1/`N' {
	replace response4_1_terminate_noreason=1 if response4_1`i'=="terminate_noreason"
	replace response4_1_reduce_payment=1 if response4_1`i'=="reduce_payment"
	replace response4_1_terminate_absent=1 if response4_1`i'=="terminate_absenteeism"
}

foreach name in "terminate_noreason" "reduce_payment" "terminate_absent" {
	replace response4_1_`name'=. if wordnum==0
}
drop response4_1 testresp wordnum

forvalues i=1/`N' {
	drop response4_1`i'
}

* Question 5_1
lab var response5_1 "Three countries of origin"

* Question 6_1
gen variable=.
replace variable = 1 if response6_1 =="yes_past"
replace variable = 2 if response6_1 =="yes_now"
replace variable = 3 if response6_1 =="yes_more"
replace variable = 4 if response6_1 =="no"
drop response6_1
rename variable response6_1
lab define res61 1 "Yes, in the past" 2 "Yes, currently employing one" 3 "Yes, currently employing more than one" ///
4 "No, never" 
lab values response6_1 res61
lab var response6_1 "Did your household ever employ a foreign domestic worker?"

* Question 7_1
gen variable=.
replace variable = 0 if response7_1 =="no"
replace variable = 1 if response7_1 =="yes"
drop response7_1
rename variable response7_1
lab values response7_1 resyesno 
lab var response7_1 "Would you consider hiring a foreign domestic worker?"

* Question 8_1
gen variable=.
replace variable = 0 if response8_1 =="no"
replace variable = 1 if response8_1 =="yes"
drop response8_1
rename variable response8_1
lab values response8_1 resyesno
lab var response8_1 "Was any of the domestic workers you employed from the Philippines?"

* Question 1_2
gen variable=.
replace variable = 0 if response1_2 =="no"
replace variable = 1 if response1_2 =="yes"
drop response1_2
rename variable response1_2
lab values response1_2 resyesno
lab var response1_2 "Is the person you are matched with a real person?"

* Question 2_2
label variable response2_2 "In which country does the person matched with you live?"

* Question 3_2
label variable response3_2 "What else do you remember about the person matched with you?"


* Question 1_3
rename response11_3 response1_3_payontime
rename response12_3 response1_3_mobileaway
rename response13_3 response1_3_reducesalary
rename response14_3 response1_3_slap
rename response15_3 response1_3_trust
rename response16_3 response1_3_respect
rename response17_3 response1_3_leavehouse
rename response18_3 response1_3_nobreak

lab var response1_3_payontime "It’s very important that I pay her salary always on time"
lab var response1_3_mobileaway "If the person is lazy, taking away her mobile phone for some time is an appropriate method to make her work harder"
lab var response1_3_reducesalary "If the person is lazy, reducing her salary is an appropriate method to make her work harder"
lab var response1_3_slap "If the person is careless and breaks some dishes, a slap in the face is an appropriate punishment"
lab var response1_3_trust "I can trust this person in my household"
lab var response1_3_respect  "I would do everything to ensure that all other household members treat her respectfully"
lab var response1_3_leavehouse "It’s okay for me if she leaves the house on her own in her spare time"
lab var response1_3_nobreak "I expect her to work every day of the week."

destring response1_3_payontime, replace
destring response1_3_mobileaway, replace
destring response1_3_reducesalary, replace
destring response1_3_slap, replace
destring response1_3_trust, replace
destring response1_3_respect, replace
destring response1_3_leavehouse, replace
destring response1_3_nobreak, replace

recode response1_3* (1=-2) (2=-1) (3=0) (4=1) (5=2)
lab define res13 -2 "Not at all agree" -1 "Not much agree" 0 "Neutral" 1 "Somewhat agree" 2 "Strongly agree"
lab values response1_3* res13
gen filipino_statements=response1_3_payontime-response1_3_mobileaway-response1_3_reducesalary-response1_3_slap+response1_3_trust+response1_3_respect+response1_3_leavehouse-response1_3_nobreak
label variable filipino_statements "Composite index for questions 3.1a-h (b, c, d, h reverse coded)"
order filipino_statements, before(response1_3_payontime)

/*
split response1_3, parse(":")
foreach i of numlist 1/8 {
	split response1_3`i', parse("=")
	drop response1_3`i' 
	destring response1_3`i'2, replace
}
foreach i of numlist 1/8 {
	replace response1_3_payontime = response1_3`i'2 if response1_3`i'1 == "dwimagine_payontime"
	replace response1_3_mobileaway = response1_3`i'2 if response1_3`i'1 == "dwimagine_mobileaway"
	replace response1_3_reducesalary = response1_3`i'2 if response1_3`i'1 == "reducesalary"
	replace response1_3_slap = response1_3`i'2 if response1_3`i'1 == "dwimagine_slap"
	replace response1_3_trust = response1_3`i'2 if response1_3`i'1 == "dwimagine_trust"
	replace response1_3_leavehouse = response1_3`i'2 if response1_3`i'1 == "dwimagine_leavehouse"
	replace response1_3_nobreak = response1_3`i'2 if response1_3`i'1 == "dwimagine_nobreak"
}

foreach i of numlist 1/8 {
	drop response1_3`i'1 response1_3`i'2 
}
drop response1_3
*/
	
* Question 2_3
gen variable=.
replace variable = 1 if response2_3 =="1"
replace variable = 2 if response2_3 =="2"
replace variable = 3 if response2_3 =="3"
replace variable = 4 if response2_3 =="4"
replace variable = 5 if response2_3 =="5"
lab define res23 1 "Very positively" 2 "Somewhat positively" 3 "Neutral" ///
4 "Somewhat negatively" 5 "Very negatively"
drop response2_3
rename variable response2_3
lab variable response2_3 "In which of the following ways do you view Filipino people?"
lab values response2_3 res23

* Question 3_3
gen variable=.
replace variable = 0 if response3_3 =="no"
replace variable = 1 if response3_3 =="yes"
drop response3_3
rename variable response3_3
lab variable response3_3 "Have you ever visited the Philippines?"
lab values response3_3 resyesno

* Question 4_3
destring response4_3, replace
lab variable response4_3 "How much did you send to the other person in the first round in the first activity? (DK=99)"
recode response4_3 (99=.)

save "$cleandatapath/HK/survey.dta", replace
}

// Photo names
{
import excel "$rawdatapath/HK/phl_photo_names.xlsx", sheet("Sheet1") firstrow clear

rename picture_id match_id

save "$cleandatapath/HK/phl_photo_names.dta", replace
}

// Demographics survey
{
import excel using "$rawdatapath/HK/HK_Round_1.xlsx", clear
*Rename the variable*
rename A responseid
rename B respid
rename C status
rename D GCWaveNo
rename E RespStatus
rename F src
rename G DeviceType
rename H userid
rename I password
rename J email
rename K ourlink
rename L clientid
rename M indid
rename N roundid
rename O round2id
rename P Q1_1
rename Q Q1_2
rename R Q1_3
rename S Q1_4
rename T Q2
rename U Q3
rename V Q4
rename W Q5
rename X Q6a
rename Y Q6b
rename Z Q7
rename AA Q8
rename AB Q9_1
rename AC Q9_2
rename AD Q9_3
rename AE Q9_4
rename AF Q10
rename AG intdurMinutes
rename AH SSpid
rename AI interview_start
rename AJ interview_end
drop if _n==1 //drop first row, as there are only the variable names again
destring, replace //convert to numeric values

//label variables

label variable DeviceType "The type of device the respondent used to complete the survey"
label define DeviceType1 1 "Desktop" 2 "Mobile" 3 "Tablet"
label values DeviceType DeviceType1

label variable Q1_1 "Do you or any of your family members work in the news sector"
label define Q1_11 0 "No" 1 "Yes"
label values Q1_1 Q1_11

label variable Q1_2 "Do you or any of your family members work in a market research company"
label define Q1_21 0 "No" 1 "Yes"
label values Q1_2 Q1_21

label variable Q1_3 "Do you or any of your family members work in a advertising company"
label define Q1_31 0 "No" 1 "Yes"
label values Q1_3 Q1_31

label variable Q1_4 "None of the above (Q1_1-Q1_3)"
label define Q1_41 0 "No" 1 "Yes"
label values Q1_4 Q1_41

label variable Q2 "Respondent's age"
label define Q21 1 "Below 18" 2 "18-24" 3 "25-29" 4 "30-34" 5 "35-39" 6 "40-44" 7 "45-49" 8 "50-54" 9 "55-59" 10 "60-64" 11 "64 above"
label values Q2 Q21

label variable Q3 "Gender"
label define Q31 1 "Male" 2 "Female"
label values Q3 Q31

label variable Q4 "What is your working status"
label define Q41 1 "Full-Time (30+ hours a week per year)" 2 "Part-Time (29 hours or less a week per year)" 3 "Self-employed" 4 "Unemployed and seeking work" 5 "Full-time parents" 6 "Full-time education" 7 "retired"
label values Q4 Q41

label variable Q5 "Level of education"
label define Q51 1 "Primary school or below" 2 "Completed primary school" 3 "Started college but not yet finished" 4 "College graduate" 5 "Post-secondary education" 6 "Bachelor degree or above" 
label values Q5 Q51

label variable Q6a "Monthly personal income"
label define Q6a1 1 "No income" 2 "Below HK$5,000" 3 "HK$5,000-7,999" 4 "HK$8,000-9,999" 5 "HK$10,000-14,999" 6 "HK$15,000-19,999" 7 "HK$20,000-24,999" 8 "HK$25,000-29,999" 9 "HK$30,000-34,999" 10 "HK$35,000-39,999" 11 "HK$40,000-44,999" 12 "HK$45,000-49,999" 13 "HK$50,000-54,999" 14 "HK$55,000-59,999" 15 "HK$60,000-64,999" 16 "HK$65,000-69,999" 17 "HK$70,000-74,999" 18 "HK$75,000-79,999" 19 "Above HK$80,000" 
label values Q6a Q6a1

label variable Q6b "Monthly household income"
label define Q6b1 1 "No income" 2 "Below HK$5,000" 3 "HK$5,000-7,999" 4 "HK$8,000-9,999" 5 "HK$10,000-14,999" 6 "HK$15,000-19,999" 7 "HK$20,000-24,999" 8 "HK$25,000-29,999" 9 "HK$30,000-34,999" 10 "HK$35,000-39,999" 11 "HK$40,000-44,999" 12 "HK$45,000-49,999" 13 "HK$50,000-54,999" 14 "HK$55,000-59,999" 15 "HK$60,000-64,999" 16 "HK$65,000-69,999" 17 "HK$70,000-74,999" 18 "HK$75,000-79,999" 19 "Above HK$80,000" 
label values Q6b Q6b1

label variable Q7 "Marital status"
label define Q71 1 "Single or not married" 2 "Married" 4 "Divorced or widowed" 5 "Others" 
label values Q7 Q71
* to make it consistent with categories in KSA/UAE
replace Q7=2 if Q7==3

label variable Q8 "How many people are currently living with you"
label define Q81 1 "1" 2 "2" 3 "3" 4 "4" 5 "5" 6 "6" 7 "7" 8 "8 or above" 
label values Q8 Q81

label variable Q9_1 "Do you live with your children? If so, how old are they (Multiple Answer) Children live by themselves"

label variable Q9_2 "Do you live with your children? If so, how old are they (Multiple Answer) Independent children under the age of 18"
* To make the categories consistent across countries
replace Q9_2=5 if Q9_2>5 & Q9_2!=.
label define Q921 0 "None" 1 "One" 2 "Two" 3 "Three" 4 "Four" 5 "More than Four"
label values Q9_2 Q921
replace Q9_2=0 if Q9_2==.

label variable Q9_3 "Do you live with your children? If so, how old are they (Multiple Answer) Independent children between the ages of 18 and 25"

label variable Q9_4 "Do you live with your children? If so, how old are they (Multiple Answer) Independent children above the age of 25"

label variable Q10 "We conducted this survey in collaboration with the Asian Institute of Management (Georgetown University), the University of Michigan (University of Michigan), the University of Munich (University of Munich) and the University of St. Andrews (St. Andrews University). In the next part of the questionnaire, we will continue by connecting you to the platform of the university"
label define Q101 1 "Agree to participate" 2 "Do not agree to participate"
label values Q10 Q101

* HH income + PPP in 2018 PPP prices for the minimum values in each interval from https://data.worldbank.org/indicator/PA.NUS.PPP
gen PPP_2018_min_int=.
label variable PPP_2018_min_int "Monthly household income in 2018 PPP USD for the minimum values in each interval"
replace PPP_2018_min_int = 0 if Q6a == 1
replace PPP_2018_min_int = 0 if Q6a == 2
replace PPP_2018_min_int = 5000/5.92 if Q6a == 3
replace PPP_2018_min_int = 8000/5.92 if Q6a == 4
replace PPP_2018_min_int = 10000/5.92 if Q6a == 5
replace PPP_2018_min_int = 15000/5.92 if Q6a == 6
replace PPP_2018_min_int = 20000/5.92 if Q6a == 7
replace PPP_2018_min_int = 25000/5.92 if Q6a == 8
replace PPP_2018_min_int = 30000/5.92 if Q6a == 9
replace PPP_2018_min_int = 35000/5.92 if Q6a == 10
replace PPP_2018_min_int = 40000/5.92 if Q6a == 11
replace PPP_2018_min_int = 45000/5.92 if Q6a == 12
replace PPP_2018_min_int = 50000/5.92 if Q6a == 13
replace PPP_2018_min_int = 55000/5.92 if Q6a == 14
replace PPP_2018_min_int = 60000/5.92 if Q6a == 15
replace PPP_2018_min_int = 65000/5.92 if Q6a == 16
replace PPP_2018_min_int = 70000/5.92 if Q6a == 17
replace PPP_2018_min_int = 75000/5.92 if Q6a == 18
replace PPP_2018_min_int = 80000/5.92 if Q6a == 19

gen country="HK"
label variable country "Country"


keep userid Q2 Q3 Q4 Q5 PPP_2018_min_int Q7 Q9_2 country Device
rename Q2 age
rename Q3 gender
rename Q4 employment_status
rename Q5 education
rename PPP_2018_min_int income
rename Q7 marital_status
rename Q9_2 children

order userid gender age education employment_status income marital_status children country Device, first
rename userid participant_id

// Generate dummies for respective demographics
{
gen female=gender==2
label variable female "Female = 1"
order female, after(gender)

gen age_18_24=age==2
label variable age_18_24 "Age range 18-24 = 1"
gen age_25_29=age==3
label variable age_25_29 "Age range 25-29 = 1"
gen age_30_34=age==4
label variable age_30_34 "Age range 30-34 = 1"
gen age_35_39=age==5
label variable age_35_39 "Age range 35-39 = 1"
gen age_40_44=age==6
label variable age_40_44 "Age range 40-44 = 1"
gen age_45_49=age==7
label variable age_45_49 "Age range 45-49 = 1"
gen age_50_54=age==8
label variable age_50_54 "Age range 50-54 = 1"
gen age_55_59=age==9
label variable age_55_59 "Age range 55-59 = 1"
gen age_60_64=age==10
label variable age_60_64 "Age range 60-64 = 1"
gen age_65_above=age==11
label variable age_65_above "Age range 64 and above = 1"
order age_18_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_above, after(age)

gen education_primary_below=education==1
label variable education_primary_below "Primary school or below = 1"
gen education_primary_complete=education==2
label variable education_primary_complete "Completed primary school = 1"
gen education_college_incomplete=education==3
label variable education_college_incomplete "Started college but not yet finished = 1"
gen education_college_grad=education==4
label variable education_college_grad "College graduate = 1"
gen education_post_secondary=education==5
label variable education_post_secondary "Post-secondary education = 1"
gen education_bachelor_above=education==6
label variable education_bachelor_above "Bachelor degree or above = 1"
order education_primary_below education_primary_complete education_college_incomplete education_college_grad education_post_secondary education_bachelor_above, after(education)

gen employment_fulltime=employment_status==1
label variable employment_fulltime "Full-Time (30+ hours a week per year) = 1"
gen employment_parttime=employment_status==2
label variable employment_parttime "Part-Time (29 hours or less a week per = 1"
gen employment_selfemployed=employment_status==3
label variable employment_selfemployed "Self-employed = 1"
gen employment_unemployed=employment_status==4
label variable employment_unemployed "Unemployed and seeking work = 1"
gen employment_parent=employment_status==5
label variable employment_parent "Full-time parents = 1"
gen employment_student=employment_status==6
label variable employment_student "Full-time education = 1"
gen employment_retired=employment_status==7
label variable employment_retired "Retired = 1"
order employment_fulltime employment_parttime employment_selfemployed employment_unemployed employment_parent employment_student employment_retired, after(employment_status)

gen marital_single=marital_status==1
label variable marital_single "Single or not married = 1"
gen marital_married=marital_status==2
label variable marital_married "Married = 1"
gen marital_divorce_widow=marital_status==4
label variable marital_divorce_widow "Divorced or widowed = 1"
gen marital_other=marital_status==5
label variable marital_other "Other marital status = 1"
order marital_single marital_married marital_divorce_widow marital_other, after(marital_status)

gen children_none=children==0
label variable children_none "No children = 1"
gen children_one=children==1
label variable children_one "One child = 1"
gen children_two=children==2
label variable children_two "Two children = 1"
gen children_three=children==3
label variable children_three "Three children = 1"
gen children_four=children==4
label variable children_four "Four children = 1"
gen children_fiveplus=children==5
label variable children_fiveplus "Five children and more = 1"
gen children_dummy=children>0 & children!=.
label variable children_dummy "Any children = 1"
order children_none children_one children_two children_three children_four children_fiveplus children_dummy, after(children)

foreach var of varlist female age_18_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_above education_primary_below education_primary_complete education_college_incomplete education_college_grad education_post_secondary education_bachelor_above employment_fulltime employment_parttime employment_selfemployed employment_unemployed employment_parent employment_student employment_retired marital_single marital_married marital_divorce_widow marital_other children_none children_one children_two children_three children_four children_fiveplus children_dummy {
	lab values `var' resyesno
	// there are some instances where income is missing while other demographics are collected
	// ID=9501887 has gender and age data but no other demographics; drop these; only completed round 1, so not used in main analysis anyways 
	replace `var'=. if education==.
	}
}

save "$cleandatapath/HK/phl_demographics_cn.dta", replace
}

** Merge Data ******************************************************************

use "$cleandatapath/HK/$participants.dta", clear

merge 1:1 participant_id using "$cleandatapath/HK/$participants_activities.dta", nogenerate

merge 1:1 participant_id using "$cleandatapath/HK/$participants_whatsapp.dta", nogenerate

merge 1:1 participant_id using "$cleandatapath/HK/survey.dta", nogenerate

merge 1:1 participant_id using "$cleandatapath/HK/iat_user_dscore.dta", nogenerate

merge m:1 match_id using "$cleandatapath/HK/phl_photo_names.dta", nogenerate

merge 1:1 participant_id using "$cleandatapath/HK/phl_demographics_cn.dta", nogenerate


* Cleaning up the dataset

* Egalitarian default variable
gen d_eq = !default
label variable d_eq "Default egalitarian = 1"
label values d_eq resyesno
order d_eq, after(default)
drop default

* Reordering survey data
order response5_1, before(response6_1)
order response1_2 response2_2 response3_2, after(response8_1)
order response4_3, after(response3_3)
order filipino_statements response1_3_payontime response1_3_mobileaway response1_3_reducesalary response1_3_slap response1_3_trust response1_3_respect response1_3_leavehouse response1_3_nobreak, before(response2_3)

generate SURVEY_0=""
generate SURVEY_1=""
generate SURVEY_2=""
generate SURVEY_3=""

order SURVEY_1, before(response1_1)
order SURVEY_2, before(response1_2)
order SURVEY_3, before(response1_3_payontime)
order SURVEY_0, before(SURVEY_1)

/* Rename survey variables so they match the PAP
rename response1_1 survey1_1 
rename response2_1 survey1_2
rename response3_1_med_care survey1_3a
rename response3_1_contact_fam survey1_3b
rename response3_1_rest survey1_3c
rename response3_1_terminate_contr survey1_3d
rename response4_1_terminate_noreason survey1_4a
rename response4_1_reduce_payment survey1_4b
rename response4_1_terminate_absent survey1_4c
rename response5_1 survey1_5
rename response6_1 survey1_6
rename response7_1 survey1_7 */

* Shorten survey variable names
foreach var of varlist response* {
	local hvar = subinstr("`var'", "response", "q",.)
	rename `var' `hvar'
}

rename q2_1 hq1_2
rename q1_2 q2_1
rename hq1_2 q1_2

rename q3_1_med_care q1_3a
rename q3_1_contact_fam q1_3b
rename q3_1_rest q1_3c
rename q3_1_terminate_contr q1_3d
rename q4_1_terminate_noreason q1_4a
rename q4_1_reduce_payment q1_4b
rename q4_1_terminate_absent q1_4c
rename q5_1 q1_5
rename q6_1 q1_6
rename q7_1 q1_7
rename q8_1 q1_8

rename q3_2 hq2_3
rename q2_3 q3_2
rename hq2_3 q2_3

rename q1_3_payontime q3_1_payontime
rename q1_3_mobileaway q3_1_mobileaway
rename q1_3_reducesalary q3_1_reducesalary
rename q1_3_slap q3_1_slap
rename q1_3_trust q3_1_trust
rename q1_3_respect q3_1_respect
rename q1_3_leavehouse q3_1_leavehouse
rename q1_3_nobreak q3_1_nobreak
rename q3_3 q3_3
rename q4_3 q3_4

* DICTATOR GAME TRANSFER variables
generate dg_transfer1=20-round1_activity1_choice
generate dg_transfer2=20-round2_activity1_choice
order dg_transfer1 dg_transfer2, after(d_eq)
drop round1_activity1_choice round2_activity1_choice

lab var dg_transfer1 "Transfer to Filipina 1"
lab var dg_transfer2 "Transfer to Filipina 2"

* BINARY DICTATOR GAME ANTISOCIAL CHOICES variables
forvalues j=1(1)2 {
	forvalues i=2(1)6 {
		gen bg_`i'_antisocial`j'=choice`i'_`j'
		label variable bg_`i'_antisocial`j' "Game `i', round `j', antisocial = 1"
		label values bg_`i'_antisocial`j' resyesno
		}
	order bg_2_antisocial`j' bg_3_antisocial`j' bg_4_antisocial`j' bg_5_antisocial`j' bg_6_antisocial`j', after(dg_transfer`j')
	}

* ANTISOCIAL EXTENT variables
forvalues j=1(1)2 {
	gen antisocial_extent`j'=bg_2_antisocial`j'+bg_3_antisocial`j'+bg_4_antisocial`j'+bg_5_antisocial`j'+bg_6_antisocial`j'
	replace antisocial_extent`j'=. if bg_2_antisocial`j'==.
	label variable antisocial_extent`j' "Sum of antisocial choices in binary dictator games, round `j'"
	order antisocial_extent`j', before(bg_2_antisocial`j')
	}
	
* EGALITARIAN variables
forvalues j=1(1)2 {
	gen egalitarian`j'=(choice1_`j'==0) & (choice2_`j'==0) & (choice3_`j'==0) & (choice4_`j'==0) & (choice5_`j'==0) & (choice6_`j'==0)
	replace egalitarian`j'=. if choice1_`j'==.
	label variable egalitarian`j' "Egalitarian type = 1, round `j'"
	label values egalitarian`j' resyesno
	order egalitarian`j', after(antisocial_extent`j')
	}

* IAT D-score
order dscore, before(round1_complete)
label variable dscore "IAT D-score"
rename dscore d_score

	
* TREATMENT variables
forvalues j=1(1)2 {
	gen C`j'=treatment`j'==1
	replace C`j'=. if treatment`j'==.
	label variable C`j' "Control treatment = 1, round `j'"
	label values C`j' resyesno
	}

forvalues j=1(1)2 {
	gen P`j'=treatment`j'==2
	replace P`j'=. if treatment`j'==.
	label variable P`j' "Photo treatment = 1, round `j'"
	label values P`j' resyesno
	}

forvalues j=1(1)2 {
	gen G`j'=treatment`j'==3
	replace G`j'=. if treatment`j'==.
	label variable G`j' "Gift treatment = 1, round `j'"
	label values G`j' resyesno
	}	

forvalues j=1(1)2 {
	gen PG`j'=treatment`j'==4
	replace PG`j'=. if treatment`j'==.
	label variable PG`j' "Photo + gift treatment = 1, round `j'"
	label values PG`j' resyesno
	}	

gen CC=treatment1==1 & treatment2==1
gen PC=treatment1==2 & treatment2==1
gen GC=treatment1==3 & treatment2==1
gen PGC=treatment1==4 & treatment2==1
gen CP=treatment1==1 & treatment2==2
gen CG=treatment1==1 & treatment2==3
gen CPG=treatment1==1 & treatment2==4
label variable CC "Round 1: Control, Round 2: Control"
label variable PC "Round 1: Photo, Round 2: Control"
label variable GC "Round 1: Gift, Round 2: Control"
label variable PGC "Round 1: Photo + Gift, Round 2: Control"
label variable CP "Round 1: Control, Round 2: Photo"
label variable CG "Round 1: Control, Round 2: Gift"
label variable CPG "Round 1: Control, Round 2: Photo + Gift"
label values CC PC GC PGC CP CG CPG resyesno

replace treatment1 = treatment1-1
replace treatment2 = treatment2-1

lab define l_treat 0 "Control" 1 "Photo" 2 "Gift" 3 "Gift + photo"
lab val treatment1 treatment2 l_treat

order C1 P1 G1 PG1 C2 P2 G2 PG2 CC PC GC PGC CP CG CPG, after(treatment2)
	
* HONG KONG variable
gen HK=language=="cn"
label variable HK "Hong Kong participant = 1"
label values HK resyesno
order HK, after(language)

* KSA variable
gen KSA=0
label variable KSA "Saudi Arabia participant = 1"
label values KSA resyesno
order KSA, after(language)

* UAE variable
gen UAE=0
label variable UAE "United Arab Emirates participant = 1"
label values UAE resyesno
order UAE, after(language)



* ROUND DURATION variable
forvalues j=1(1)2 {
	gen round`j'_duration=(time_enter_round_`j'_page_8-time_enter_round_`j'_page_2)/60000
	label variable round`j'_duration "Round `j' duration (in min)"
	order round`j'_duration, after(round`j'_complete)
	}
gen total_time=round1_duration+round2_duration
label variable total_time "Total duration both rounds (in min)"
order total_time, after(round2_duration)

* ROUND COMPLETE variable
* ROUND DURATION variable

*************************************
**Some are not string. Drop those and destring other.
drop if round1_complete=="0000-00-00 00:00:00"
destring round1_complete, replace
destring round2_complete, replace
*************************************

forvalues j=1(1)2 {
	label variable round`j'_complete "Round `j' complete = 1"
	label values round`j'_complete resyesno
	}

* MATCH REAL
generate match_real=q2_1==1
replace match_real=. if q2_1==.
label variable match_real "Matched person is real = 1"
label values match_real resyesno
order match_real, after(SURVEY_2)

* Status variable
gen status = 1 if time_login<. & round1_complete!=1
replace status = 2 if round1_complete==1
replace status = 3 if round1_complete==1 & time_enter_round_2_page_2<. & round2_complete!=1
replace status = 4 if round2_complete==1

lab define l_status 1 "Incomplete round 1" 2 "Complete round 1, did not start round 2" 3 "Complete round 1, incomplete round 2" 4 "Both rounds complete"
lab val status l_status

gen attrition = 0 if status > 1
replace attrition = 1 if status ==2 | status ==3

lab values attrition resyesno
lab var attrition "Attrited after round 1"

order status round?_complete attrition, after(participant_id) 

* Order demographics survey
order gender female age age_18_24 age_25_29 age_30_34 age_35_39 age_40_44 age_45_49 age_50_54 age_55_59 age_60_64 age_65_above education education_primary_below education_primary_complete education_college_incomplete education_college_grad education_post_secondary education_bachelor_above employment_status employment_fulltime employment_parttime employment_selfemployed employment_unemployed employment_parent employment_student employment_retired income marital_status marital_single marital_married marital_divorce_widow marital_other children children_none children_one children_two children_three children_four children_fiveplus children_dummy country Device, after(SURVEY_0)

* Order match_id data
gen MATCH_ID_DATA=""
order MATCH_ID_DATA, before(wife_name)

* MATCH PHILIPPINES variable
* needs to be classified by an RA at the end (contains all sorts of characters)

* RESTRICT ONLY TO ACTUAL PARTICIPANTS
keep if language=="cn"
* keep if round1_complete

* Earlier participants were test subjects only
keep if participant_id>=60

* Parsing useragent (to see what device/browser subjects used)
***Install PARSEUAS beforehand***
*ssc install parseuas
parseuas browser, browser(browsertype) browserversion(browserversion) os(OS) device(hardwaretype) //parses the agent regarding browser type, browser version, operating system type and version, and the type of hardware (other is here PC/Laptop)
gen BROWSER_HARDWARE_DATA=""
order BROWSER_HARDWARE_DATA, before(browser)
replace hardwaretype="1" if hardwaretype=="Device (other)"
replace hardwaretype="1" if hardwaretype=="Personal computer (Linux)"
replace hardwaretype="1" if hardwaretype=="Personal computer (Mac)"
replace hardwaretype="1" if hardwaretype=="Personal computer (Windows)"
replace hardwaretype="1" if hardwaretype=="Personal computer (Chrome OS)"
replace hardwaretype="2" if hardwaretype=="Mobile phone (Android)"
replace hardwaretype="2" if hardwaretype=="Mobile phone (iPhone)"
replace hardwaretype="2" if hardwaretype=="Mobile phone (other)"
replace hardwaretype="3" if hardwaretype=="Tablet (Android)"
replace hardwaretype="3" if hardwaretype=="Tablet (iPad)"
destring hardwaretype, replace
label define hardwaretype 1 "Desktop" 2 "Mobile phone" 3 "Tablet"
label values hardwaretype hardwaretype

compress

save "$cleandatapath/HK/merged_data_cn.dta", replace

// some house cleaning
rm "$cleandatapath/HK/iat_user_dscore.dta"
rm "$cleandatapath/HK/participants_activities_cn.dta"
rm "$cleandatapath/HK/participants_cn.dta"
rm "$cleandatapath/HK/participants_whatsapp_cn.dta"
rm "$cleandatapath/HK/phl_demographics_cn.dta"
rm "$cleandatapath/HK/phl_photo_names.dta"
rm "$cleandatapath/HK/survey.dta"
